
# Functions --------------------------------------------------------------------
source("scripts/0_functions/functions_cleaning.R")

# Packages ---------------------------------------------------------------------
#if packages are not installed, they must be installed with:
# install.packages("package_name"); The name of package must be in quotes
library(dplyr)
library(readr)

#preprocessing done to original market scoping data file:
#change columnn wendors_thur_wetseasn to vendors_thur_werseasn
#change mktname Kawonekela to Kawonekera
#change mktname Mwema to Mwima
#add in longitude (33.4708) (-12.141) and latitude for Embangweni (from Google Maps)
#add n_lowturnout_months which counts how many months of low turnout were indicated

# Load Data --------------------------------------------------------------------
mkt_scoping_full <- read_csv("data/1_raw/market_scoping_data.csv")
treatment_groups <- read_csv("data/2_clean/treatment_groups.csv")


# Cleaning ---------------------------------------------------------------------
#changing Mzimba to M'mbelwa
mkt_scoping_full$distrname[mkt_scoping_full$distrname == "Mzimba"] <- "M'mbelwa"

#merging with treatment indicator
mkt_scoping_full <- mkt_scoping_full %>%  
  left_join(treatment_groups %>% select(Market, treatment_status),
            by = c("mktname" = "Market")) 

#creating additional helpfull tax collector variables
mkt_scoping_full <- mkt_scoping_full %>% 
  mutate(TaxCollectors_filled_0s = NAto0(TaxCollectors_filled),
         RevenueCollectors_filled_0s = NAto0(RevenueCollectors_filled),
         other_pos_w_col_resp_0s = NAto0(other_pos_w_col_resp),
         unfilled_rc_positions_0s = NAto0(unfilled_rc_positions),
         unfilled_tc_positions_0s = NAto0(unfilled_tc_positions),
         TaxCollectors_total = TaxCollectors_filled_0s +
           unfilled_tc_positions_0s,
         RevenueCollectors_total = RevenueCollectors_filled_0s +
           unfilled_rc_positions_0s,
         all_tax_pers_filled = TaxCollectors_filled_0s +
           RevenueCollectors_filled_0s + other_pos_w_col_resp_0s,
         all_tax_pers = TaxCollectors_total +
           RevenueCollectors_total + other_pos_w_col_resp_0s)

#cutting down to sample markets for one version
mkt_scoping_samp <- mkt_scoping_full %>% filter(mktname %in% treatment_groups$Market &
                                        samplewithLL == 1)


#creating vendor number estimates as weighted daily average
mkt_scoping_samp <- mkt_scoping_samp %>% 
  mutate(tot_ven_dry = rowSums(pick(v_dry_mon, v_dry_tues,
                                        v_dry_wed, v_dry_thur,
                                        v_dry_fri, v_dry_sat, v_dry_sun),
                                   na.rm = TRUE),
         tot_ven_wet = rowSums(pick(vendors_mon_wetseasn, vendors_tues_wetseasn,
                                    vendors_wed_wetseasn, vendors_thur_wetseasn,
                                    vendors_fri_wetseasn, vendors_sat_wetseasn,
                                    vendors_sun_wetseasn),
                               na.rm = TRUE),
         tot_ven_dry_old = v_dry_mon + v_dry_tues + v_dry_wed +
                            v_dry_thur + v_dry_fri + v_dry_sat + v_dry_sun,
         tot_ven_wet_old = vendors_mon_wetseasn + vendors_tues_wetseasn +
                           vendors_wed_wetseasn + vendors_thur_wetseasn +
                           vendors_fri_wetseasn + vendors_sat_wetseasn +
                           vendors_sun_wetseasn,
         avg_ven_dry = tot_ven_dry/7,
         avg_ven_wet = tot_ven_wet/7,
         avg_ven_dry_old = tot_ven_dry_old/7,
         avg_ven_wet_old = tot_ven_wet_old/7,
         ven_daily_wt_old = avg_ven_dry_old * ((12 - n_lowturnout_months)/12) + 
           avg_ven_wet_old * (n_lowturnout_months/12),
         ven_daily_wt = avg_ven_dry * ((12 - n_lowturnout_months)/12) + 
           avg_ven_wet * (n_lowturnout_months/12))

#making vendor estimates as weighted average of max per dry and wet week (opt 1)
mkt_scoping_samp$ven_dry_max <- apply((mkt_scoping_samp %>% 
                                  select(contains("v_dry")))[,1:7],
                               1, max, na.rm = T)
mkt_scoping_samp$ven_wet_max <- apply((mkt_scoping_samp %>% 
                                  select(contains("_wetseasn"))),
                               1, max, na.rm = T)

mkt_scoping_samp <- mkt_scoping_samp %>% 
  mutate(ven_max_wt = (ven_dry_max * ((12 - n_lowturnout_months)/12)  +
                         ven_wet_max * (n_lowturnout_months/12)))

mkt_scoping_samp <- mkt_scoping_samp %>% arrange(mktname)

# Save -------------------------------------------------------------------------
save(mkt_scoping_samp, file = "data/2_clean/mkt_scoping_sample.RDATA")
save(mkt_scoping_full, file = "data/2_clean/mkt_scoping_full.RDATA")


